
****************************************
* Export data to Matlab
****************************************


* Prelims

* Data cleaning
use "port-pairs-data2.dta", clear
keep if post==0
drop if boat_size_teu==.
drop if traffic_teu==0
drop if traffic_teu==. 
replace XPanama=0 if XPanama==.

group2hdfe A_acid D_acid, group(gid)  // Use the biggest mobility group
keep if gid==1
save /tmp/cleaned, replace

* Make sure that a port exists both as a source and destination
* Note: We have to do this iteratively, to make sure we end up with the same arrival and destination ports

* Round 1
use /tmp/cleaned, clear
collapse (sum) D_freq=freq, by(D_acid D_port_name D_country)
save /tmp/tmp, replace
use /tmp/cleaned, clear
collapse (sum) A_freq=freq, by(A_acid)
ren A_acid D_acid
merge 1:1 D_acid using /tmp/tmp
keep if _merge==3
keep D_acid D_freq D_port_name D_country A_freq
save /tmp/ports, replace

use /tmp/cleaned, clear
merge m:1 D_acid using /tmp/ports, keep(matched) nogen
ren (D_acid A_acid) (A_acid D_acid)
merge m:1 D_acid using /tmp/ports, keep(matched) nogen
ren (D_acid A_acid) (A_acid D_acid)
save /tmp/cleaned2, replace

* Round 2
use /tmp/cleaned2, clear
collapse (sum) D_freq=freq, by(D_acid D_port_name D_country)
save /tmp/tmp, replace
use /tmp/cleaned2, clear
collapse (sum) A_freq=freq, by(A_acid)
ren A_acid D_acid
merge 1:1 D_acid using /tmp/tmp
keep if _merge==3
keep D_acid D_freq D_port_name D_country A_freq
save /tmp/ports, replace

use /tmp/cleaned2, clear
merge m:1 D_acid using /tmp/ports, keep(matched) nogen
ren (D_acid A_acid) (A_acid D_acid)
merge m:1 D_acid using /tmp/ports, keep(matched) nogen
ren (D_acid A_acid) (A_acid D_acid)
save /tmp/tmpsample, replace
* Note: Have to check manually that 2 iterations is sufficient.


* Check that the sample is OK
use /tmp/tmpsample, clear
collapse (count) A_acid, by(D_acid)
gen Did = _n
keep D_acid Did
save /tmp/id, replace

use /tmp/tmpsample, clear
merge m:1 D_acid using /tmp/id, nogen
ren (D_acid A_acid Did) (A_acid D_acid Aid)
merge m:1 D_acid using /tmp/id
ren (D_acid A_acid Aid Did) (A_acid D_acid Did Aid)
* Should get perfect match


****************************************
* Export to matlab
****************************************


* Panama canal indicator
use /tmp/tmpsample, clear
gen t=XPanama
keep D_acid A_acid t
reshape wide t, i(D_acid) j(A_acid) 
outfile using "XPanamamatrix.csv", comma wide replace noquote

* Traffic 
use /tmp/tmpsample, clear
gen t = traffic_dwt
keep D_acid A_acid t
sort D_acid A_acid t
reshape wide t, i(D_acid) j(A_acid) 
outfile using "trafficmatrix.csv", comma wide replace noquote

* Make list of ports
use /tmp/tmpsample, clear
collapse (sum) freq, by(D_acid D_port_name D_country)
outfile using "portname.csv", comma wide replace noquote

****************************************
* Export data to Matlab: Expenditure by port
****************************************

use "Expenditure_port.dta", clear
ren A_acid D_acid
merge 1:1 D_acid using /tmp/tmpport, keep(match)
outfile D_acid traffic_dwt Emanu_port E_port using "expenditure.csv", comma wide replace noquote
ren D_acid A_acid

